# -*- coding: utf-8 -*-
import scrapy
from zc_core.client.mongo_client import Mongo
from scrapy import Request
from zc_core.dao.batch_dao import BatchDao
from zc_core.model.items import Box
from zc_core.pipelines.helper.catalog_helper import CatalogHelper
from zc_core.pipelines.helper.gov_cat_helper import GovCatHelper
from zc_core.util.http_util import retry_request
from zc_core.spiders.base import BaseSpider
from huiemall.rules import *
from scrapy.exceptions import IgnoreRequest


class FullSpider(BaseSpider):
    name = 'full'
    # 常用链接
    sku_list_url = 'http://www.huiemall.com/EpointMallHeFeiService/rest/spgys/getproducts'

    def __init__(self, batchNo=None, *args, **kwargs):
        super(FullSpider, self).__init__(batchNo=batchNo, *args, **kwargs)
        # 创建批次记录
        BatchDao().create_batch(self.batch_no)
        # 供应商
        self.suppliers = {}
        supplier_list = Mongo().list('supplier_pool')
        for sp in supplier_list:
            self.suppliers[sp.get('_id')] = sp.get('name')

    def start_requests(self):
        for sp_id in self.suppliers.keys():
            # 供应商
            page = 1
            yield Request(
                method='POST',
                url=self.sku_list_url,
                headers={
                    'Accept': 'application/json, text/javascript, */*; q=0.01',
                    'Content-Type': 'application/json',
                },
                body=json.dumps({
                    "gysguid": sp_id,
                    "cid": "",
                    "sort": "",
                    "page": str(page),
                    "size": "16",
                    "keyword": "",
                    "jnhb": "",
                    "url": "http://www.huiemall.com/HeFei/gysgoods.html?gysguid={}&page={}&sort=&cid=&jnhb=&keyword=".format(
                        sp_id, str(page)),
                    "referer": "http://www.huiemall.com/HeFei/gysgoods.html?gysguid={}&page={}&sort=&cid=&jnhb=&keyword=".format(
                        sp_id, str(page))
                }),
                meta={
                    'reqType': 'item',
                    'batchNo': self.batch_no,
                    'page': str(page),
                    'spId': sp_id,
                },
                callback=self.parse_total_sku,
                errback=self.error_back,
                priority=100,
            )

    def parse_total_sku(self, response):
        meta = response.meta
        sp_id = meta.get('spId')

        # 处理供应商分页总数
        total = parse_total_sku_page(response)
        if total:
            self.logger.info('商品总页数: sp=%s, total=%s' % (sp_id, total))
            # 供应商商品
            for page in range(1, total + 1):
                yield Request(
                    method='POST',
                    url=self.sku_list_url,
                    headers={
                        'Accept': 'application/json, text/javascript, */*; q=0.01',
                        'Content-Type': 'application/json',
                    },
                    body=json.dumps({
                        "gysguid": sp_id,
                        "cid": "",
                        "sort": "",
                        "page": str(page),
                        "size": "16",
                        "keyword": "",
                        "jnhb": "",
                        "url": "http://www.huiemall.com/HeFei/gysgoods.html?gysguid={}&page={}&sort=&cid=&jnhb=&keyword=".format(
                            sp_id, str(page)),
                        "referer": "http://www.huiemall.com/HeFei/gysgoods.html?gysguid={}&page={}&sort=&cid=&jnhb=&keyword=".format(
                            sp_id, str(page))
                    }),
                    meta={
                        'reqType': 'item',
                        'batchNo': self.batch_no,
                        'spId': sp_id,
                        'page': page,
                    },
                    callback=self.parse_item_data,
                    errback=self.error_back,
                    priority=200,
                )

    # 处理sku列表
    def parse_item_data(self, response):
        meta = response.meta
        page = meta.get('page')
        sp_id = meta.get('spId')
        item_list = parse_item_data(response)
        if item_list:
            self.logger.info('清单: sp=%s, page=%s, cnt=%s' % (sp_id, page, len(item_list)))
            yield Box('item', self.batch_no, item_list)
        else:
            self.logger.info('分页完成<sp=%s, page=%s, count=%s>' % (sp_id, page, len(item_list)))
